use BGT_2007_2010_2012_withvars_122815.dta, clear
capture drop union_member2006 union_member_pct2006 tot_emp2006 union_rep2006 union_rep_pct2006 replace_rate dum_train
merge m:1 statefip occsoc using bls_state_occupation_employment_projections.dta , keep(master match) nogen


rename turnover_rate ann_turnover_st_occ
label variable ann_turnover_st_occ "annual turnover rate 2013-2015 (annual_change/employment, 2013)"

merge m:1 occsoc using bls_mechanism_covariates.dta, keep(master match) nogen
rename educ onet_educ_req
label variable onet_educ_req "ONET specified education requirements"

encode training, gen(train)
fre train
drop training
gen d_train=0
replace d_train=1 if inlist(train, 1,2)
label variable d_train "dummy: indicates moderate/longterm training"



preserve
import excel using hiring_costs_time_to_fill.xlsx, sheet("Sheet2") firstrow clear
tempfile a 
save `a', replace

import excel using hiring_costs_time_to_fill.xlsx, sheet("Sheet3") firstrow clear 
tostring occsoc, replace
merge m:1 match using `a', keep(match) nogen 
drop D
rename Position job_name_cornell
label variable job_name_cornell "job title mapped to in cornell data"


tempfile a
save `a', replace
restore

merge m:1 occsoc using `a', keep(master match) nogen


merge m:1 statefip using union_by_state_2006_2007.dta, keep(master match) nogen


import excel sort occsoc title newcode using soc2010_cps_crosswalk.xlsx, cellrange(a6:e825) allstring clear
replace occsoc=subinstr(occsoc, "-","",.)
destring newcode, replace
merge m:1 newcode using union_by_detailed_occupation.dta, keep(match) nogen

preserve
	import excel soc2000 soc2000_name soc2010 soc2010_name using soc_2000_to_2010_crosswalk.xlsx, clear cellrange(A9:D868) allstring
	drop if soc2000==soc2010
	duplicates tag soc2000, gen(dup00)
	duplicates tag soc2010, gen(dup10)
	keep if dup10==0 & dup00!=0

	gen occsoc=soc2010
	tempfile a
	save `a', replace
restore

merge m:1 occsoc using `a', keepusing(soc2000 soc2000_name)
replace occsoc=soc2000 if _m==3

destring employment members covered, ignore(",") replace
collapse (sum) employment members covered, by(occsoc)
gen union_occsoc_mem=(members/employment)*100
gen union_occsoc_cov=(covered/employment)*100


tempfile a 
save `a', replace


use BGT_2007_2010_2012_withvars_122815_mechanisms.dta, clear 
merge m:1 occsoc using `a', nogen keep(master match) keepusing(union*)



save BGT_2007_2010_2012_withvars_122815_mechanisms.dta, replace 



************************************

use union_by_detailed_occupation.dta, clear
gen occsoc=oldcode

gen occsoc4=substr(oldcode,1,4)
destring employment members covered, ignore(",") replace
collapse (sum) employment members covered, by(occsoc4)
gen union_occsoc_mem=(members/employment)*100
gen union_occsoc_cov=(covered/employment)*100


preserve
	import excel soc2000 soc2000_name soc2010 soc2010_name using $pathUPS/labour_paper/crosswalk/soc_2000_to_2010_crosswalk.xlsx, clear cellrange(A9:D868) allstring
	drop if soc2000==soc2010
	duplicates tag soc2000, gen(dup00)
	duplicates tag soc2010, gen(dup10)
	keep if dup10==0 & dup00!=0

	gen occsoc=soc2010
	tempfile a
	save `a', replace
restore

merge m:1 occsoc using `a', keepusing(soc2000 soc2000_name)
replace occsoc=soc2000 if _m==3

list soc2000 occsoc if _m==2
drop if _m==2

local code 131071
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="131079" if substr(occsoc,1,5)=="`code5'"

local code 151152
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="151081" if substr(occsoc,1,5)=="`code5'"

local code 252051
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="252041" if substr(occsoc,1,5)=="`code5'"

local code 291161
local code5=substr("`code'",1,4)
list occsoc _m if substr(occsoc,1,4)=="`code5'"
replace occsoc="252041" if substr(occsoc,1,4)=="`code5'"

local code 292057
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="292099" if substr(occsoc,1,5)=="`code5'"

local code 292092
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="292099" if substr(occsoc,1,5)=="`code5'"


local code 311014
local code5=substr("`code'",1,5)
list occsoc _m if substr(occsoc,1,5)=="`code5'"
replace occsoc="311012" if substr(occsoc,1,5)=="`code5'"

collapse (mean) mem cov, by(occsoc)

list occsoc soc2000 if _m==2
drop if _m==2
list occsoc if substr(occsoc,1,5)=="13107"
replace occsoc="131079" if occsoc=="131070"
list occsoc if substr(occsoc,1,4)=="1511"
replace occsoc="151081" if occsoc=="151150"
list occsoc if substr(occsoc,1,4)=="2520"
replace occsoc="252040" if occsoc=="252050" 
list occsoc if substr(occsoc,1,4)=="2920"

preserve
gen occsoc_minor=substr(oldcode, 1,3)
foreach var of varlist employment members covered {
	replace `var'=subinstr(`var',",","",.)
}

destring  employment members covered, replace
collapse (sum) employment members covered, by(occsoc_minor)
gen union_minor_mem=members/employment
gen union_minor_cov=covered/employment
tempfile a
save `a', replace
restore

rename occsoc occsoc_union_match
keep occsoc_union_match mem cov
rename mem union_occsoc_mem
rename cov union_occsoc_cov

save union.dta, replace


use BGT_2007_2010_2012_withvars_122815_mechanisms.dta, clear
gen occsoc4=substr(occsoc,1,4)
replace occsoc4="1511" if occsoc4=="1510"
replace occsoc4="2510" if occsoc4=="2511"

merge m:1 occsoc4 using union.dta, gen(_m1) keep(master match)

reg dneed2 i.(occsoc_num year)  c.du##c.union_occsoc_cov $weight $restric & i_orig_samp==1,  cl(state)

preserve
	keep if _m1==3
	tempfile p1
	save `p1', replace
restore

keep if _m1==1

replace occsoc_union_match=substr(occsoc, 1,5) + "0"
merge m:1 occsoc_union_match using union.dta, gen(_m2) keep(master match)

preserve
	keep if _m2==3
	tempfile p2
	save `p2', replace
restore

keep if _m2==1
replace occsoc_union_match=substr(occsoc, 1,4) + "00"
replace occsoc_union_match="531000" if occsoc=="531031" | occsoc=="531021"

merge m:1 occsoc_union_match using union.dta, gen(_m3) keep(master match)

merge m:1 occsoc_minor using `a', keep(master match)

merge m:1 occsoc_union_match using `b', gen(_m1) keep(master match)


preserve
	keep if _m2==3
	tempfile p2
	save `p2', replace
restore


keep if _m2==1
replace occsoc_union_match="531000" if occsoc=="531031" | occsoc=="531021"
replace occsoc_union_match="333051" if occsoc=="333050"


merge m:1 occsoc_union_match using `b', gen(_m3) keep(master match)
append using `p1' `p2'

BGT_2007_2010_2012_withvars_122815_mechanisms.dta, replace 
